from pyforest import *
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp
from plotly.subplots import make_subplots
import sidetable
import math
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import geopandas as gpd
import emoji
import warnings
warnings.filterwarnings('ignore')
fifa = pd.read_csv("https://raw.githubusercontent.com/EylonYehiel/projects/main/Data/Fifa%2023%20Players%20Data.csv")
fifa.sample(5)
| Known As | Full Name | Overall | Potential | Value(in Euro) | Positions Played | Best Position | Nationality | Image Link | Age | ... | LM Rating | CM Rating | RM Rating | LWB Rating | CDM Rating | RWB Rating | LB Rating | CB Rating | RB Rating | GK Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11433 | I. Wadji | Ibrahima Wadji | 64 | 65 | 675000 | ST | ST | Senegal | https://cdn.sofifa.net/players/240/497/23_60.png | 27 | ... | 65 | 56 | 65 | 51 | 46 | 51 | 48 | 43 | 48 | 18 |
| 0 | L. Messi | Lionel Messi | 91 | 91 | 54000000 | RW | CAM | Argentina | https://cdn.sofifa.net/players/158/023/23_60.png | 35 | ... | 91 | 88 | 91 | 67 | 66 | 67 | 62 | 53 | 62 | 22 |
| 2815 | J. Haberer | Janik Haberer | 73 | 73 | 3000000 | CM,CDM,RM | CM | Germany | https://cdn.sofifa.net/players/211/879/23_60.png | 28 | ... | 72 | 73 | 72 | 70 | 72 | 70 | 68 | 67 | 68 | 23 |
| 3027 | A. Bamba | Abdoul Kader Bamba | 72 | 72 | 2400000 | LM,RM,CM | LM | France | https://cdn.sofifa.net/players/246/790/23_60.png | 28 | ... | 72 | 68 | 72 | 61 | 57 | 61 | 58 | 50 | 58 | 19 |
| 12078 | Han Jiaqi | Jiaqi Han | 63 | 73 | 925000 | GK | GK | China PR | https://cdn.sofifa.net/players/253/955/23_60.png | 22 | ... | 29 | 33 | 29 | 28 | 33 | 28 | 28 | 32 | 28 | 64 |
5 rows × 89 columns
fifa.shape
(18539, 89)
fifa.stb.missing().sum()
missing 0.0 total 1649971.0 percent 0.0 dtype: float64
fifa.columns
Index(['Known As', 'Full Name', 'Overall', 'Potential', 'Value(in Euro)',
'Positions Played', 'Best Position', 'Nationality', 'Image Link', 'Age',
'Height(in cm)', 'Weight(in kg)', 'TotalStats', 'BaseStats',
'Club Name', 'Wage(in Euro)', 'Release Clause', 'Club Position',
'Contract Until', 'Club Jersey Number', 'Joined On', 'On Loan',
'Preferred Foot', 'Weak Foot Rating', 'Skill Moves',
'International Reputation', 'National Team Name',
'National Team Image Link', 'National Team Position',
'National Team Jersey Number', 'Attacking Work Rate',
'Defensive Work Rate', 'Pace Total', 'Shooting Total', 'Passing Total',
'Dribbling Total', 'Defending Total', 'Physicality Total', 'Crossing',
'Finishing', 'Heading Accuracy', 'Short Passing', 'Volleys',
'Dribbling', 'Curve', 'Freekick Accuracy', 'LongPassing', 'BallControl',
'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance',
'Shot Power', 'Jumping', 'Stamina', 'Strength', 'Long Shots',
'Aggression', 'Interceptions', 'Positioning', 'Vision', 'Penalties',
'Composure', 'Marking', 'Standing Tackle', 'Sliding Tackle',
'Goalkeeper Diving', 'Goalkeeper Handling', ' GoalkeeperKicking',
'Goalkeeper Positioning', 'Goalkeeper Reflexes', 'ST Rating',
'LW Rating', 'LF Rating', 'CF Rating', 'RF Rating', 'RW Rating',
'CAM Rating', 'LM Rating', 'CM Rating', 'RM Rating', 'LWB Rating',
'CDM Rating', 'RWB Rating', 'LB Rating', 'CB Rating', 'RB Rating',
'GK Rating'],
dtype='object')
fifa.describe()
| Overall | Potential | Value(in Euro) | Age | Height(in cm) | Weight(in kg) | TotalStats | BaseStats | Wage(in Euro) | Release Clause | ... | LM Rating | CM Rating | RM Rating | LWB Rating | CDM Rating | RWB Rating | LB Rating | CB Rating | RB Rating | GK Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 18539.000000 | 18539.000000 | 1.853900e+04 | 18539.000000 | 18539.000000 | 18539.000000 | 18539.000000 | 18539.000000 | 18539.000000 | 1.853900e+04 | ... | 18539.000000 | 18539.000000 | 18539.000000 | 18539.000000 | 18539.000000 | 18539.000000 | 18539.000000 | 18539.000000 | 18539.000000 | 18539.000000 |
| mean | 65.852042 | 71.016668 | 2.875461e+06 | 25.240412 | 181.550839 | 75.173904 | 1602.114569 | 357.946221 | 8824.537462 | 5.081688e+06 | ... | 58.451319 | 57.374076 | 58.451319 | 56.281569 | 55.928583 | 56.281569 | 55.650251 | 54.528184 | 55.650251 | 23.257134 |
| std | 6.788353 | 6.192866 | 7.635129e+06 | 4.718163 | 6.858097 | 7.013593 | 273.160237 | 39.628259 | 19460.531154 | 1.467203e+07 | ... | 13.987122 | 13.171194 | 13.987122 | 13.903836 | 13.872190 | 13.903836 | 14.159466 | 14.743929 | 14.159466 | 15.108925 |
| min | 47.000000 | 48.000000 | 0.000000e+00 | 16.000000 | 155.000000 | 49.000000 | 759.000000 | 224.000000 | 0.000000 | 0.000000e+00 | ... | 18.000000 | 18.000000 | 18.000000 | 17.000000 | 19.000000 | 17.000000 | 17.000000 | 18.000000 | 17.000000 | 10.000000 |
| 25% | 62.000000 | 67.000000 | 4.750000e+05 | 21.000000 | 177.000000 | 70.000000 | 1470.000000 | 331.000000 | 1000.000000 | 6.650000e+05 | ... | 54.000000 | 53.000000 | 54.000000 | 51.000000 | 48.000000 | 51.000000 | 49.000000 | 45.000000 | 49.000000 | 17.000000 |
| 50% | 66.000000 | 71.000000 | 1.000000e+06 | 25.000000 | 182.000000 | 75.000000 | 1640.000000 | 358.000000 | 3000.000000 | 1.500000e+06 | ... | 62.000000 | 60.000000 | 62.000000 | 59.000000 | 59.000000 | 59.000000 | 59.000000 | 58.000000 | 59.000000 | 18.000000 |
| 75% | 70.000000 | 75.000000 | 2.000000e+06 | 29.000000 | 186.000000 | 80.000000 | 1786.000000 | 385.000000 | 8000.000000 | 3.400000e+06 | ... | 67.000000 | 66.000000 | 67.000000 | 66.000000 | 66.000000 | 66.000000 | 65.000000 | 66.000000 | 65.000000 | 20.000000 |
| max | 91.000000 | 95.000000 | 1.905000e+08 | 44.000000 | 206.000000 | 105.000000 | 2312.000000 | 502.000000 | 450000.000000 | 3.667000e+08 | ... | 92.000000 | 91.000000 | 92.000000 | 88.000000 | 89.000000 | 88.000000 | 87.000000 | 90.000000 | 87.000000 | 90.000000 |
8 rows × 71 columns
def bold(text):
return ("\033[1m" + f"{text}" + "\033[0m")
fig = go.Figure()
fig.add_trace(go.Histogram(x=fifa['Age'], name='Age', xbins = dict(size=1)))
fig.add_trace(go.Histogram(x=fifa['Height(in cm)'], name='Height (cm)'))
fig.add_trace(go.Histogram(x=fifa['Weight(in kg)'], name='Weight (kg)'))
fig.update_layout(height=600, width=1100, title='Histograms of Attributes :', xaxis_title_text='Attribute Value', yaxis_title_text='Frequency')
df = pd.DataFrame(fifa.head(1000))
numeric_columns = ['Overall', 'Value(in Euro)', 'Wage(in Euro)', 'Age']
pxq = px.colors.qualitative
fig = go.Figure(data=go.Splom(
dimensions=[dict(label=col, values=df[col]) for col in numeric_columns],
diagonal=dict(visible=True),
marker_color = pxq.Pastel*1000,
marker_size = df['Age']/3, text = df['Full Name']))
fig.update_layout(title='Pair-plot of Attributes:', height = 900)
fig.show()
df[numeric_columns].corr()
| Overall | Value(in Euro) | Wage(in Euro) | Age | |
|---|---|---|---|---|
| Overall | 1.000000 | 0.778583 | 0.725597 | 0.076431 |
| Value(in Euro) | 0.778583 | 1.000000 | 0.688834 | -0.400339 |
| Wage(in Euro) | 0.725597 | 0.688834 | 1.000000 | -0.034584 |
| Age | 0.076431 | -0.400339 | -0.034584 | 1.000000 |
foot = fifa['Preferred Foot'].value_counts()
trace1 = go.Pie(labels=['Right','Left'],values=[foot[0],foot[1]],
hoverinfo='label+value', textinfo='percent', textfont=dict(size=18),
marker=dict(colors=['#DC143C','#FFA500'],line=dict(color='#FFFFFF', width=4)), pull=[0, 0.1])
left_percentage = 11.5 # Average percantage of left-handed people (Wikipedia)
right_percentage = 88.5
trace2 = go.Pie(labels=['Right', 'Left'], values=[right_percentage, left_percentage],
hoverinfo='label+percent', textinfo='percent', textfont=dict(size=18),
marker=dict(colors=['#DC143C', '#FFA500'], line=dict(color='#FFFFFF', width=4)), pull=[0, 0.1])
fig = make_subplots(1, 2, specs=[[{'type':'domain'}, {'type':'domain'}]], subplot_titles=['Population', 'Footballers'])
fig.add_trace(trace1, 1, 2)
fig.add_trace(trace2, 1, 1)
fig.update_layout(title_text='Strong side (Right or Left) - football players VS whole population :', height = 600, width = 1000)
fig.show()
fig = px.histogram(fifa, x="Age", color="Best Position", nbins=55)
fig.update_layout(height=450, width=1100, title='Ages distribution by positions', yaxis_title_text='Frequency')
fig.show()
fig = px.histogram(fifa, x="Weight(in kg)", color="Best Position", nbins=55)
fig.update_layout(height=450, width=1100, title='Weight distribution by positions', yaxis_title_text='Frequency')
fig.show()
fig = px.histogram(fifa, x="Height(in cm)", color="Best Position", nbins=50)
fig.update_layout(height=450, width=1100, title='Height distribution by positions', yaxis_title_text='Frequency')
fig.show()
fig, ax = plt.subplots(figsize=(30, 24))
sns.heatmap(fifa.corr(), cmap='coolwarm', vmin=-1, vmax=1, fmt='.2f', linewidths=0.05, ax=ax)
ax.set_title('Correlation Matrix of All Attributes :\n', fontsize=25)
plt.xticks(fontsize=17)
plt.yticks(fontsize=17)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=15)
plt.show()
fig = px.imshow(fifa[['Overall','Reactions','Composure','Passing Total','Wage(in Euro)','Value(in Euro)','Shot Power','Age']].corr(), color_continuous_scale='OrRd', zmin=0, zmax=1)
fig.update_layout(width=800, height=600, title='Strong Connnections Zoom-in:')
fig.show()
features = ['Reactions', 'Composure','Passing Total', 'Wage(in Euro)', 'Age']
target = 'Overall'
X_train, X_test, y_train, y_test = train_test_split(fifa[features], fifa[target], test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
r2_score = model.score(X_test, y_test)
print("RMSE: " + bold(rmse))
print("r^2: " + bold(r2_score))
RMSE: 2.397600706049064 r^2: 0.8728423345934998
importances = model.feature_importances_
print("Feature Importances:")
for feature, importance in zip(features, importances):
print(f"{feature}: {importance}")
Feature Importances: Reactions: 0.7533940990013311 Composure: 0.047712616582223796 Passing Total: 0.07441100885822209 Wage(in Euro): 0.08892444372035126 Age: 0.03555783183787177
fig = px.scatter(x=y_test, y=y_pred, labels={'x': 'Actual Score', 'y': 'Predicted Score'}, title='Actual vs. Predicted Overall Ratings:',
trendline='ols')
fig.update_layout(height = 500)
fig.show()
def pred(Reactions, Composure, Pass, Wage, Age):
p = model.predict([[Reactions,Composure,Pass,Wage,Age]])
print(f"Overall score for {Reactions, Composure, Pass, Wage, Age} values is : {float(p)}")
pred(81,64,90,50000,28)
pred(78,86,70,100000,30)
pred(94,90,60,700000,21)
pred(94,80,80,60000,22)
Overall score for (81, 64, 90, 50000, 28) values is : 83.17 Overall score for (78, 86, 70, 100000, 30) values is : 79.7 Overall score for (94, 90, 60, 700000, 21) values is : 90.52 Overall score for (94, 80, 80, 60000, 22) values is : 85.2
general_features = ['Full Name', 'Age', 'Height(in cm)', 'Weight(in kg)', 'Nationality','Overall','Potential',
'Value(in Euro)', 'Wage(in Euro)', 'Sprint Speed', 'Defending Total', 'Dribbling Total',
'Pace Total', 'Shooting Total', 'Passing Total', 'Physicality Total', 'Best Position',
'Club Name','Preferred Foot','BaseStats','TotalStats','International Reputation','Weak Foot Rating']
fields = fifa[general_features][fifa['Best Position'] != 'GK']
fields.sort_values(['Overall','TotalStats','Value(in Euro)'], ascending = False).head(10)
| Full Name | Age | Height(in cm) | Weight(in kg) | Nationality | Overall | Potential | Value(in Euro) | Wage(in Euro) | Sprint Speed | ... | Shooting Total | Passing Total | Physicality Total | Best Position | Club Name | Preferred Foot | BaseStats | TotalStats | International Reputation | Weak Foot Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | Kevin De Bruyne | 31 | 181 | 70 | Belgium | 91 | 91 | 107500000 | 350000 | 73 | ... | 88 | 93 | 77 | CM | Manchester City | Right | 483 | 2303 | 4 | 5 |
| 2 | Robert Lewandowski | 33 | 185 | 81 | Poland | 91 | 91 | 84000000 | 420000 | 75 | ... | 91 | 79 | 83 | ST | FC Barcelona | Right | 458 | 2205 | 5 | 4 |
| 0 | Lionel Messi | 35 | 169 | 67 | Argentina | 91 | 91 | 54000000 | 195000 | 76 | ... | 89 | 90 | 64 | CAM | Paris Saint-Germain | Left | 452 | 2190 | 5 | 4 |
| 4 | Kylian Mbappé | 23 | 182 | 73 | France | 91 | 95 | 190500000 | 230000 | 97 | ... | 89 | 80 | 76 | ST | Paris Saint-Germain | Right | 470 | 2177 | 4 | 4 |
| 1 | Karim Benzema | 34 | 185 | 81 | France | 91 | 91 | 64000000 | 450000 | 80 | ... | 88 | 83 | 78 | CF | Real Madrid CF | Right | 455 | 2147 | 4 | 4 |
| 5 | Mohamed Salah | 30 | 175 | 71 | Egypt | 90 | 90 | 115500000 | 270000 | 91 | ... | 89 | 82 | 75 | RW | Liverpool | Left | 471 | 2226 | 4 | 3 |
| 8 | C. Ronaldo dos Santos Aveiro | 37 | 187 | 83 | Portugal | 90 | 90 | 41000000 | 220000 | 83 | ... | 92 | 78 | 75 | ST | Manchester United | Right | 445 | 2159 | 5 | 4 |
| 9 | Virgil van Dijk | 30 | 193 | 92 | Netherlands | 90 | 90 | 98000000 | 230000 | 91 | ... | 60 | 71 | 86 | CB | Liverpool | Right | 461 | 2117 | 4 | 3 |
| 17 | Joshua Kimmich | 27 | 177 | 75 | Germany | 89 | 90 | 105500000 | 130000 | 60 | ... | 72 | 87 | 79 | CDM | FC Bayern München | Right | 473 | 2283 | 4 | 4 |
| 13 | Carlos Henrique Venancio Casimiro | 30 | 185 | 84 | Brazil | 89 | 89 | 86000000 | 240000 | 66 | ... | 73 | 75 | 90 | CDM | Manchester United | Right | 460 | 2209 | 3 | 3 |
10 rows × 23 columns
goalies = fifa[fifa['Best Position'] == 'GK']
goalies[general_features].sort_values(['Overall','TotalStats','Value(in Euro)'], ascending = False).head(10)
| Full Name | Age | Height(in cm) | Weight(in kg) | Nationality | Overall | Potential | Value(in Euro) | Wage(in Euro) | Sprint Speed | ... | Shooting Total | Passing Total | Physicality Total | Best Position | Club Name | Preferred Foot | BaseStats | TotalStats | International Reputation | Weak Foot Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 7 | Manuel Neuer | 36 | 193 | 93 | Germany | 90 | 90 | 13500000 | 72000 | 60 | ... | 88 | 91 | 91 | GK | FC Bayern München | Right | 501 | 1535 | 5 | 4 |
| 6 | Thibaut Courtois | 30 | 199 | 96 | Belgium | 90 | 91 | 90000000 | 250000 | 52 | ... | 89 | 75 | 89 | GK | Real Madrid CF | Left | 473 | 1334 | 4 | 3 |
| 16 | Ederson Santana de Moraes | 28 | 188 | 86 | Brazil | 89 | 91 | 88000000 | 210000 | 63 | ... | 82 | 93 | 88 | GK | Manchester City | Left | 502 | 1583 | 3 | 3 |
| 18 | Alisson Ramses Becker | 29 | 191 | 91 | Brazil | 89 | 90 | 79000000 | 190000 | 49 | ... | 85 | 85 | 90 | GK | Liverpool | Right | 489 | 1437 | 3 | 3 |
| 14 | Jan Oblak | 29 | 188 | 87 | Slovenia | 89 | 91 | 85500000 | 100000 | 58 | ... | 90 | 78 | 87 | GK | Atlético de Madrid | Right | 479 | 1402 | 5 | 3 |
| 27 | Marc-André ter Stegen | 30 | 187 | 85 | Germany | 88 | 89 | 68500000 | 210000 | 50 | ... | 85 | 87 | 85 | GK | FC Barcelona | Right | 480 | 1443 | 4 | 4 |
| 26 | Keylor Navas | 35 | 185 | 80 | Costa Rica | 88 | 88 | 10000000 | 85000 | 53 | ... | 84 | 75 | 87 | GK | Paris Saint-Germain | Right | 478 | 1428 | 3 | 3 |
| 22 | Gianluigi Donnarumma | 23 | 196 | 90 | Italy | 88 | 92 | 103500000 | 110000 | 55 | ... | 83 | 79 | 85 | GK | Paris Saint-Germain | Right | 478 | 1375 | 3 | 3 |
| 37 | Mike Maignan | 26 | 191 | 89 | France | 87 | 90 | 80000000 | 90000 | 53 | ... | 82 | 85 | 85 | GK | AC Milan | Right | 477 | 1496 | 2 | 4 |
| 35 | David De Gea Quintana | 31 | 192 | 76 | Spain | 87 | 87 | 42000000 | 150000 | 50 | ... | 80 | 76 | 84 | GK | Manchester United | Right | 468 | 1415 | 4 | 3 |
10 rows × 23 columns
best = fifa.sort_values(['Overall','TotalStats','Value(in Euro)'], ascending = False)
x = best.Nationality.value_counts().head(20)
colors = ['red', 'gold', 'orange', 'blue', 'turquoise', 'green', 'indigo', 'purple', 'brown', 'teal']
fig = go.Figure([go.Bar(x=x.index, y=x.values, marker_color=colors)])
fig.update_layout(width=900, height=500, title='Top 20 origin nationalities :' , xaxis_title='Nationality', yaxis_title='Number of Players')
fig.show()
coordinates = pd.read_csv("https://raw.githubusercontent.com/EylonYehiel/projects/main/Data/world_and_usa_states_coordinated.csv")
coordinates = coordinates[['country', 'latitude', 'longitude']].rename(columns={'country':'Nationality'})
coordinates.iloc[73,0] = 'England' # Adjust the several names of England int one
cofifa = fifa.merge(coordinates, on = 'Nationality')
cofifa = cofifa[['Full Name','Nationality','latitude', 'longitude','Overall', 'Value(in Euro)', 'Age', 'Club Name','International Reputation']]
cofifa.sample(7)
| Full Name | Nationality | latitude | longitude | Overall | Value(in Euro) | Age | Club Name | International Reputation | |
|---|---|---|---|---|---|---|---|---|---|
| 13573 | Shuhei Kawasaki | Japan | 36.204824 | 138.252924 | 63 | 1200000 | 21 | Portimonense SC | 1 |
| 10079 | Gergő Lovrencsics | Hungary | 47.162494 | 19.503304 | 70 | 825000 | 33 | Hajduk Split | 1 |
| 3045 | Sinan Gümüş | Germany | 51.165691 | 10.451526 | 67 | 1000000 | 28 | Antalyaspor | 1 |
| 11658 | Ben Paton | Canada | 56.130366 | -106.346771 | 57 | 350000 | 22 | Ross County FC | 1 |
| 10119 | Kevin Mbabu | Switzerland | 46.818188 | 8.227512 | 77 | 10000000 | 27 | Fulham | 2 |
| 7517 | Edvin Austbø | Norway | 60.472024 | 8.468946 | 54 | 350000 | 17 | Viking FK | 1 |
| 14556 | Călin Popescu | Romania | 45.943161 | 24.966760 | 56 | 325000 | 20 | AFC Hermannstadt | 1 |
world_map = folium.Map(location=[0, 0], zoom_start=1.5, width=850, height=500)
mc = MarkerCluster()
for idx, row in cofifa.iterrows():
if not math.isnan(row['longitude']) and not math.isnan(row['latitude']):
mc.add_child(Marker([row['latitude'], row['longitude']], popup=f"<b>Full Name:</b>{row['Full Name']}\n<b>\nClub Name:</b> ({row['Club Name']})<br><b>Nationality:</b> {row['Nationality']}<br><b>Age:</b> {row['Age']}"))
world_map.add_child(mc)
world_map